#!/bin/bash
#
# Generate Sysdump
# creates a snapshot of system state for debugging later.
#

set -u

ERROR_TAR_FAILED=5
ERROR_PROCFS_SAVE_FAILED=6
ERROR_INVALID_ARGUMENT=10

TAR=tar
MKDIR=mkdir
RM=rm
LN=ln
GZIP=gzip
CP=cp
MV=mv
GREP=grep
TOUCH=touch
V=
NOOP=false
DO_COMPRESS=true
CMD_PREFIX=
SINCE_DATE="@0" # default is set to January 1, 1970 at 00:00:00 GMT
REFERENCE_FILE=/tmp/reference
BASE=sonic_dump_`hostname`_`date +%Y%m%d_%H%M%S`
DUMPDIR=/var/dump
TARDIR=$DUMPDIR/$BASE
TARFILE=$DUMPDIR/$BASE.tar
LOGDIR=$DUMPDIR/$BASE/dump

###############################################################################
# Runs a comamnd and saves its output to the incrementally built tar.
# Globals:
#  LOGDIR
#  BASE
#  MKDIR
#  TAR
#  TARFILE
#  DUMPDIR
#  V
#  RM
#  NOOP
# Arguments:
#  cmd: The command to run. Make sure that arguments with spaces have quotes
#  filename: the filename to save the output as in $BASE/dump
#  do_gzip: (OPTIONAL) true or false. Should the output be gzipped
# Returns:
#  None
###############################################################################
save_cmd() {
    local cmd="$1"
    local filename=$2
    local filepath="${LOGDIR}/$filename"
    local do_gzip=${3:-false}
    local tarpath="${BASE}/dump/$filename"
    [ ! -d $LOGDIR ] && $MKDIR $V -p $LOGDIR

    # eval required here to re-evaluate the $cmd properly at runtime
    # This is required if $cmd has quoted strings that should be bunched
    # as one argument, e.g. vtysh -c "COMMAND HERE" needs to have
    # "COMMAND HERE" bunched together as 1 arg to vtysh -c
    if $do_gzip
    then
        tarpath="${tarpath}.gz"
        filepath="${filepath}.gz"
        if $NOOP; then
            echo "eval $cmd 2>&1 | gzip -c > '${filepath}'"
        else
            eval "$cmd" 2>&1 | gzip -c > "${filepath}"
        fi
    else
        if $NOOP; then
            echo "eval $cmd &> '$filepath'"
        else
            eval "$cmd" &> "$filepath"
        fi
    fi
    ($TAR $V -rhf $TARFILE -C $DUMPDIR "$tarpath" \
        || abort "${ERROR_TAR_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
        && $RM $V -rf "$filepath"
}

###############################################################################
# Runs a vtysh command and saves its output to the incrementally built tar.
# Globals:
#  None
# Arguments:
#  cmd: the vtysh command to run. This should NOT include vtysh -c
#  filename: The filename to save the output as.
#  do_gzip: (OPTIONAL) true or false. Should the output be gzipped
# Returns:
#  None
###############################################################################
save_vtysh() {
    local vtysh_cmd=$1
    local filename=$2
    local do_gzip=${3:-false}
    save_cmd "vtysh -c '${vtysh_cmd}'" "$filename" $do_gzip
}

###############################################################################
# Runs an ip command and saves its output to the incrementally built tar.
# Globals:
#  None
# Arguments:
#  cmd: the ip command to run sans 'ip'
#  filename: Files will be named 'ip.<filename>'
#  do_gzip: (OPTIONAL) true or false. Should the output be gzipped
# Returns:
#  None
###############################################################################
save_ip() {
    local ip_args=$1
    local filename="ip.$2"
    local do_gzip=${3:-false}
    save_cmd "ip $ip_args" "$filename" $do_gzip
}

###############################################################################
# Iterates all neighbors and runs save_vtysh to save each neighbor's
# advertised-routes and received-routes
# Globals:
#  None
# Arguments:
#  None
# Returns:
#  None
###############################################################################
save_bgp_neighbor() {
    neighbor_list_v4=$(vtysh -c "show ip bgp neighbors" | grep "BGP neighbor is" | awk -F '[, ]' '{print $4}')
    for word in $neighbor_list_v4; do
        save_vtysh "show ip bgp neighbors $word advertised-routes" "ip.bgp.neighbor.$word.adv"
        save_vtysh "show ip bgp neighbors $word routes" "ip.bgp.neighbor.$word.rcv"
    done
    neighbor_list_v6=$(vtysh -c "show bgp ipv6 neighbors" | grep "BGP neighbor is" | awk -F '[, ]' '{print $4}' | fgrep ':')
    for word in $neighbor_list_v6; do
        save_vtysh "show bgp ipv6 neighbors $word advertised-routes" "ipv6.bgp.neighbor.$word.adv"
        save_vtysh "show bgp ipv6 neighbors $word routes" "ipv6.bgp.neighbor.$word.rcv"
    done
}

###############################################################################
# Dump the nat config, iptables rules and conntrack nat entries
# Globals:
#  None
# Arguments:
#  None
# Returns:
#  None
###############################################################################
save_nat_info() {
    save_cmd "iptables -t nat -nv -L" "nat.iptables"
    save_cmd "conntrack -j -L" "nat.conntrack"
    save_cmd "conntrack -j -L | wc" "nat.conntrackcount"
    save_cmd "conntrack -L" "nat.conntrackall"
    save_cmd "conntrack -L | wc" "nat.conntrackallcount"
    save_cmd "show nat config" "nat.config"
}

###############################################################################

###############################################################################
# Given list of proc files, saves proc files to tar.
# Globals:
#  V
#  TARDIR
#  MKDIR
#  CP
#  DUMPDIR
#  TAR
#  RM
#  BASE
#  TARFILE
# Arguments:
#  *procfiles: variable-length list of proc file paths to save
# Returns:
#  None
###############################################################################
save_proc() {
    local procfiles="$@"
    $MKDIR $V -p $TARDIR/proc \
        && $CP $V -r $procfiles $TARDIR/proc \
        && $TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+rw $BASE/proc \
        && $RM $V -rf $TARDIR/proc
}

###############################################################################
# Dumps all fields and values from given Redis DB.
# Arguments:
#  DB name: filename to which output will be saved
# Returns:
#  None
###############################################################################
save_redis() {
    local db_name=$1
    save_cmd "sonic-db-dump -n '$db_name' -y" "$db_name.json"
}

###############################################################################
# Runs a 'show platform' command, append the output to 'filename' and add to the incrementally built tar.
# Globals:
#  LOGDIR
#  BASE
#  MKDIR
#  TAR
#  TARFILE
#  DUMPDIR
#  V
#  RM
# Arguments:
#  type: the type of platform information
#  filename: the filename to save the output as in $BASE/dump
# Returns:
#  None
###############################################################################
save_platform() {
    local type="$1"
    local filename=$2
    local filepath="${LOGDIR}/$filename"
    local tarpath="${BASE}/dump/$filename"
    [ ! -d $LOGDIR ] && $MKDIR $V -p $LOGDIR

    eval "show platform $type" &>> "$filepath"
    echo $'\r' >> "$filepath"

    ($TAR $V -uhf $TARFILE -C $DUMPDIR "$tarpath" \
        || abort "${ERROR_TAR_FAILED}" "tar append operation failed. Aborting to prevent data loss.")
}

###############################################################################
# Runs a comamnd and saves its output to the incrementally built tar.
# Globals:
#  LOGDIR
#  BASE
#  MKDIR
#  TAR
#  TARFILE
#  DUMPDIR
#  V
#  RM
#  NOOP
# Arguments:
#  filename: the full path of the file to save
#  base_dir: the directory in $TARDIR/ to stage the file
#  do_gzip: (OPTIONAL) true or false. Should the output be gzipped
# Returns:
#  None
###############################################################################
save_file() {
    local orig_path=$1
    local supp_dir=$2
    local gz_path="$TARDIR/$supp_dir/$(basename $orig_path)"
    local tar_path="${BASE}/$supp_dir/$(basename $orig_path)"
    local do_gzip=${3:-true}
    [ ! -d "$TARDIR/$supp_dir" ] && $MKDIR $V -p "$TARDIR/$supp_dir"

    if $do_gzip; then
        gz_path="${gz_path}.gz"
        tar_path="${tar_path}.gz"
        if $NOOP; then
            echo "gzip -c $orig_path > $gz_path"
        else
            gzip -c $orig_path > $gz_path
        fi
    else
        if $NOOP; then
            echo "cp $orig_path $gz_path"
        else
            cp $orig_path $gz_path
        fi
    fi
    ($TAR $V -rhf $TARFILE -C $DUMPDIR "$tar_path" \
        || abort "${ERROR_PROCFS_SAVE_FAILED}" "tar append operation failed. Aborting to prevent data loss.") \
        && $RM $V -f "$gz_path"
}

###############################################################################
# find_files routine
# Globals:
#  SINCE_DATE: list files only newer than given date
#  REFERENCE_FILE: the file to be created as a reference to compare modification time
# Arguments:
#  directory: directory to search files in
# Returns:
#  None
###############################################################################
find_files() {
    local -r directory=$1
    $TOUCH --date="${SINCE_DATE}" "${REFERENCE_FILE}"
    local -r find_command="find -L $directory -type f -newer ${REFERENCE_FILE}"

    echo $($find_command)
}

###############################################################################
# disable_logrotate routine
# Globals:
#  None
# Arguments:
#  None
# Returns:
#  None
###############################################################################
disable_logrotate() {
    sed -i '/\/usr\/sbin\/logrotate/s/^/#/g' /etc/cron.d/logrotate
}

###############################################################################
# enable_logrotate routine
# Globals:
#  None
# Arguments:
#  None
# Returns:
#  None
###############################################################################
enable_logrotate() {
    sed -i '/\/usr\/sbin\/logrotate/s/^#*//g' /etc/cron.d/logrotate
}

###############################################################################
# Main generate_dump routine
# Globals:
#  All of them.
# Arguments:
#  None
# Returns:
#  None
###############################################################################
main() {
    if [ `whoami` != root ] && ! $NOOP;
    then
        echo "$0: must be run as root (or in sudo)" >&2
        exit 10
    fi
    ${CMD_PREFIX}renice +5 -p $$ >> /dev/null
    ${CMD_PREFIX}ionice -c 2 -n 5 -p $$ >> /dev/null

    $MKDIR $V -p $TARDIR

    # Start with this script so its obvious what code is responsible
    $LN $V -s /usr/bin/generate_dump $TARDIR
    $TAR $V -chf $TARFILE -C $DUMPDIR $BASE
    $RM $V -f $TARDIR/sonic_dump

    # Capture /proc state early
    save_proc /proc/buddyinfo /proc/cmdline /proc/consoles \
        /proc/cpuinfo /proc/devices /proc/diskstats /proc/dma \
        /proc/interrupts /proc/iomem /proc/ioports /proc/kallsyms \
        /proc/loadavg /proc/locks /proc/meminfo /proc/misc \
        /proc/modules /proc/self/mounts /proc/self/net \
        /proc/pagetypeinfo /proc/partitions /proc/sched_debug /proc/slabinfo \
        /proc/softirqs /proc/stat /proc/swaps /proc/sysvipc /proc/timer_list \
        /proc/uptime /proc/version /proc/vmallocinfo /proc/vmstat \
        /proc/zoneinfo \
        || abort "${ERROR_PROCFS_SAVE_FAILED}" "Proc saving operation failed. Aborting for safety."

    save_cmd "systemd-analyze blame" "systemd.analyze.blame"
    save_cmd "systemd-analyze dump" "systemd.analyze.dump"
    save_cmd "systemd-analyze plot" "systemd.analyze.plot.svg"

    save_platform "syseeprom" "platform"
    save_platform "psustatus" "platform"
    save_platform "ssdhealth" "platform"
    save_platform "temperature" "platform"
    save_platform "fan" "platform"

    save_cmd "show version" "version"
    save_cmd "show platform summary" "platform.summary"
    save_cmd "cat /host/machine.conf" "machine.conf"
    save_cmd "docker stats --no-stream" "docker.stats"

    save_cmd "sensors" "sensors"
    save_cmd "lspci -vvv -xx" "lspci"
    save_cmd "lsusb -v" "lsusb"
    save_cmd "sysctl -a" "sysctl"
    save_ip "link" "link"
    save_ip "addr" "addr"
    save_ip "rule" "rule"
    save_ip "route show table all" "route"
    save_ip "neigh" "neigh"

    save_vtysh "show ip bgp summary" "bgp.summary"
    save_vtysh "show ip bgp neighbors" "bgp.neighbors"
    save_vtysh "show ip bgp" "bgp.table"
    save_vtysh "show bgp ipv6 summary" "bgp.ipv6.summary"
    save_vtysh "show bgp ipv6 neighbors" "bgp.ipv6.neighbors"
    save_vtysh "show bgp ipv6" "bgp.ipv6.table"
    save_bgp_neighbor

    save_cmd "show interface status" "interface.status"
    save_cmd "show interface counters" "interface.counters"
    save_cmd "show interface transceiver presence" "interface.xcvrs.presence"
    save_cmd "show interface transceiver eeprom --dom" "interface.xcvrs.eeprom"

    save_cmd "lldpctl" "lldpctl"

    save_cmd "ps aux" "ps.aux"
    save_cmd "free" "free"
    save_cmd "vmstat 1 5" "vmstat"
    save_cmd "vmstat -m" "vmstat.m"
    save_cmd "vmstat -s" "vmstat.s"
    save_cmd "mount" "mount"
    save_cmd "df" "df"
    save_cmd "dmesg" "dmesg"

    save_nat_info

    save_redis "APPL_DB"
    save_redis "ASIC_DB"
    save_redis "COUNTERS_DB"
    save_redis "CONFIG_DB"
    save_redis "FLEX_COUNTER_DB"
    save_redis "STATE_DB"

    save_cmd "docker ps -a" "docker.ps"
    save_cmd "docker top pmon" "docker.pmon"

    save_cmd "docker exec -it syncd saidump" "saidump"

    local asic="$(/usr/local/bin/sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)"
    if [[ "$asic" = "mellanox" ]]; then
        local sai_dump_filename="/tmp/sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")"
        ${CMD_PREFIX}docker exec -it syncd saisdkdump -f $sai_dump_filename
        ${CMD_PREFIX}docker exec syncd tar Ccf $(dirname $sai_dump_filename) - $(basename $sai_dump_filename) | tar Cxf /tmp/ -
        save_file $sai_dump_filename sai_sdk_dump true

        local mst_dump_filename="/tmp/mstdump"
        local max_dump_count="3"
        for i in $(seq 1 $max_dump_count); do
            ${CMD_PREFIX}/usr/bin/mstdump /dev/mst/mt*conf0 > "${mst_dump_filename}${i}"
            save_file "${mst_dump_filename}${i}" mstdump true
        done
    fi

    if [ "$asic" = "broadcom" ]; then
        save_cmd "bcmcmd -t5 version" "broadcom.version"
        save_cmd "bcmcmd -t5 soc" "broadcom.soc"
        save_cmd "bcmcmd -t5 ps" "broadcom.ps"
        save_cmd "cat /proc/bcm/knet/debug" "broadcom.knet.debug"
        save_cmd "cat /proc/bcm/knet/dma" "broadcom.knet.dma"
        save_cmd "cat /proc/bcm/knet/dstats" "broadcom.knet.dstats"
        save_cmd "cat /proc/bcm/knet/link" "broadcom.knet.link"
        save_cmd "cat /proc/bcm/knet/rate" "broadcom.knet.rate"
        save_cmd "cat /proc/bcm/knet/stats" "broadcom.knet.stats"
        save_cmd "bcmcmd \"l3 nat_ingress show\"" "broadcom.nat.ingress"
        save_cmd "bcmcmd \"l3 nat_egress show\"" "broadcom.nat.egress"
    fi

    if $GREP -qi "aboot_platform=.*arista" /host/machine.conf; then
        save_cmd "cat /proc/scd" "scd"
        save_cmd "arista syseeprom" "arista.syseeprom"
        save_cmd "arista dump" "arista.dump"
    fi

    $RM $V -rf $TARDIR
    $MKDIR $V -p $TARDIR
    $MKDIR $V -p $LOGDIR
    $LN $V -s /etc $TARDIR/etc

    ($TAR $V -rhf $TARFILE -C $DUMPDIR --mode=+rw \
        --exclude="etc/alternatives" \
        --exclude="*/etc/passwd*" \
        --exclude="*/etc/shadow*" \
        --exclude="*/etc/group*" \
        --exclude="*/etc/gshadow*" \
        --exclude="*/etc/ssh*" \
        --exclude="*get_creds*" \
        --exclude="*snmpd.conf*" \
        --exclude="/etc/mlnx" \
        --exclude="/etc/mft" \
        $BASE/etc \
        || abort "${ERROR_TAR_FAILED}" "Tar append operation failed. Aborting for safety.") \
        && $RM $V -rf $TARDIR

    disable_logrotate
    trap enable_logrotate HUP INT QUIT TERM KILL ABRT ALRM

    # gzip up all log files individually before placing them in the incremental tarball
    for file in $(find_files "/var/log/"); do
        # ignore the sparse file lastlog
        if [ "$file" = "/var/log/lastlog" ]; then
            continue
        fi
        # don't gzip already-gzipped log files :)
        if [ -z "${file##*.gz}" ]; then
            save_file $file log false
        else
            save_file $file log true
        fi
    done

    enable_logrotate

    # archive core dump files
    for file in $(find_files "/var/core/"); do
        # don't gzip already-gzipped log files :)
        if [ -z "${file##*.gz}" ]; then
            save_file $file core false
        else
            save_file $file core true
        fi
    done

    # archive kernel dump files
    for file in $(find_files "/var/crash/"); do
        # don't gzip already-gzipped dmesg files :)
        if [ ! ${file} = "/var/crash/kexec_cmd" -a ! ${file} = "/var/crash/export" ]; then
            if [[ ${file} == *"kdump."* ]]; then
                save_file $file kdump false
            else
                save_file $file kdump true
            fi
        fi
    done

    # run 'hw-management-generate-dump.sh' script and save the result file
    /usr/bin/hw-management-generate-dump.sh
    save_file "/tmp/hw-mgmt-dump*" "hw-mgmt" false
    rm -f /tmp/hw-mgmt-dump*

    # clean up working tar dir before compressing
    $RM $V -rf $TARDIR

    if $DO_COMPRESS; then
        $GZIP $V $TARFILE
        if [ $? -eq 0 ]; then
            TARFILE="${TARFILE}.gz"
        else
            echo "WARNING: gzip operation appears to have failed." >&2
        fi
    fi

    echo ${TARFILE}
}

###############################################################################
# Terminates generate_dump early just in case we have issues.
# Globals:
#  None
# Arguments:
#  retcode: 0-255 return code to exit with. default is 1
#  msg: (OPTIONAL) msg to print to standard error
# Returns:
#  None
###############################################################################
abort() {
    local exitcode=${1:-1}
    local msg=${2:-Error. Terminating early for safety.}
    echo "$msg" >&2
    exit $exitcode
}

###############################################################################
# Prints usage to stdout.
# Globals:
#  None
# Arguments:
#  None
# Returns:
#  None
###############################################################################
usage() {
    cat <<EOF
$0 [-xnvh]

Create a SONiC system dump for support/debugging. Requires root privileges.

OPTIONS
    -x
        Enable bash debug mode.
    -h
        The usage information you are reading right now
    -v
        Enable verbose mode. All commands (like tar, mkdir, rm..) will have -v
        passed to them
    -n
        Noop mode. Don't actually create anything, just echo what would happen
    -z
        Don't compress the tar at the end.
    -s DATE
        Collect logs since DATE;
        The argument is a mostly free format human readable string such as
        "24 March", "yesterday", etc.

EOF
}

while getopts ":xnvhzs:" opt; do
    case $opt in
        x)
            # enable bash debugging
            PS4="+(${BASH_SOURCE}:${LINENO}): ${FUNCNAME[0]:+${FUNCNAME[0]}(): }"
            set -x
            ;;
        h)
            usage
            exit 0
            ;;
        v)
            # echo commands about to be run to stderr
            set -v
            V="-v"
            ;;
        n)
            TAR="echo tar"
            MKDIR="echo mkdir"
            RM="echo rm"
            LN="echo ln"
            GZIP="echo gzip"
            CMD_PREFIX="echo "
            MV="echo mv"
            CP="echo cp"
            TOUCH="echo touch"
            NOOP=true
            ;;
        z)
            DO_COMPRESS=false
            ;;
        s)
            SINCE_DATE="${OPTARG}"
            # validate date expression
            date --date="${SINCE_DATE}" &> /dev/null || abort "${ERROR_INVALID_ARGUMENT}" "Invalid date expression passed: '${SINCE_DATE}'"
            ;;
        /?)
            echo "Invalid option: -$OPTARG" >&2
            exit 1
            ;;
    esac
done

main
